# Goal: Create Figures A1 & A2 
# by Jennifer Pan and Yiqing Xu

## Plotting

rm(list=ls(all=TRUE))

library(haven)
library(ggplot2)
library(scales)
library(dplyr)
# library(tinytex)
# library(stringr)
# library(reshape)


########## Figure A1 (Sample 1) ##########

d <- haven::read_dta("data/sample1.dta")
names(d)

# gender
d$gender <- factor(d$female, levels = c(0,1), 
                   labels = c("Male","Female"))


##education
d$eduyr[which(d$eduyr==6)] <- 9
d$eduyr[which(d$eduyr==21)] <- 18
d$eduyr <- factor(d$eduyr, levels = c(9,11,12,15,16,18), 
                  labels = c("Middle School or Below", "Vocational High School or equivalent", 
                             "Regular High School", "Three-year College Degree", "Four-year College Degree", 
                             "Post-graduate Degrees"))
table(d$eduyr) 

##income
d$income[which(is.na(d$income))] <- 10
d$income <- factor(d$income, levels = c(1:10), 
                   labels = c("<1000", "1001-2000", "2001-3000", "3001-5000", "5001-8000",
                              "8001-12000","12001-20000","20001-50000",">50000","NA"))
table(d$income)

## age
d$agegp <- floor((d$age - 1)/5)-2
d$agegp <- factor(d$agegp, levels = c(1:9), 
                  labels = c("18-20","21-25","26-30","31-35","36-40","41-45","46-50","51-55","56-60"))
table(d$agegp)

edu <- ggplot(data = d, aes(x = factor(eduyr))) +
  geom_bar(aes(y = (..count..)/sum(..count..))) +
  geom_text(stat = "count", 
            aes(y = ((..count..)/sum(..count..)), 
                label = scales::percent((..count..)/sum(..count..))), 
            vjust = -0.4, 
            size = 3) +
  scale_y_continuous(labels = percent, limits = c(0,0.42)) +
  labs(x = "", y = "Percentage") +
  theme_classic() + 
  theme(legend.position = "none", plot.title = element_text(size=15), 
        axis.text.x = element_text(angle = 40, hjust = 1)) 
ggsave("graphs/stat_q_edu.pdf", edu, height = 5, width = 5)

income <- ggplot(data = d, aes(x = factor(income))) +
  geom_bar(aes(y = (..count..)/sum(..count..))) +
  geom_text(stat = "count", 
            aes(y = ((..count..)/sum(..count..)), 
                label = scales::percent((..count..)/sum(..count..))), 
            vjust = -0.4, 
            size = 3) +
  scale_y_continuous(labels = percent, limits = c(0,0.25)) +
  labs(x = "", y = "Percentage") +
  theme_classic()  +
  theme(legend.position = "none", plot.title = element_text(size=15), 
        axis.text.x = element_text(angle = 30, hjust = 1)) 
ggsave("graphs/stat_q_income.pdf", income, height = 5, width = 5)


age <- ggplot(data = d, aes(x = factor(agegp))) +
  geom_bar(aes(y = (..count..)/sum(..count..))) +
  geom_text(stat = "count", 
            aes(y = ((..count..)/sum(..count..)), 
                label = scales::percent((..count..)/sum(..count..))), 
            vjust = -0.4, 
            size = 3) +
  scale_y_continuous(labels = percent, limits = c(0,0.25)) +
  labs(x = "", y = "Percentage") +
  theme_classic()  +
  theme(legend.position = "none", plot.title = element_text(size=15), 
        axis.text.x = element_text(angle = 30, hjust = 1)) 
ggsave("graphs/stat_q_age.pdf", age, height = 5, width = 5)

########## Figure A2 (Sample 2) ##########

d <- haven::read_dta("data/sample2.dta")
names(d)

# gender
d$gender <- factor(d$female, levels = c(0,1), 
  labels = c("Male","Female"))


##education
d$eduyr[which(d$eduyr==21)] <- 18
d$eduyr <- factor(d$eduyr, levels = c(9,11,12,15,16,18), 
  labels = c("Middle School or Below", "Vocational High School or equivalent", 
    "Regular High School", "Three-year College Degree", "Four-year College Degree", 
    "Post-graduate Degrees"))
table(d$eduyr) 

##income
d$income[which(is.na(d$income))] <- 10
d$income <- factor(d$income, levels = c(1:10), 
  labels = c("<1000", "1001-2000", "2001-3000", "3001-5000", "5001-8000",
    "8001-12000","12001-20000","20001-50000",">50000","NA"))
table(d$income)

## age
d$agegp <- floor((d$age - 1)/5)-2
d$agegp <- factor(d$agegp, levels = c(1:7), 
  labels = c("18-20","21-25","26-30","31-35","36-40","41-45","46-50"))
table(d$agegp)

edu <- ggplot(data = d, aes(x = factor(eduyr))) +
  geom_bar(aes(y = (..count..)/sum(..count..))) +
  geom_text(stat = "count", 
            aes(y = ((..count..)/sum(..count..)), 
                label = scales::percent((..count..)/sum(..count..))), 
            vjust = -0.4, 
            size = 3) +
  scale_y_continuous(labels = percent, limits = c(0,0.42)) +
  labs(x = "", y = "Percentage") +
  theme_classic() + 
  theme(legend.position = "none", plot.title = element_text(size=15), 
    axis.text.x = element_text(angle = 40, hjust = 1)) 
ggsave("graphs/stat_s_edu.pdf", edu, height = 5, width = 5)

income <- ggplot(data = d, aes(x = factor(income))) +
  geom_bar(aes(y = (..count..)/sum(..count..))) +
  geom_text(stat = "count", 
            aes(y = ((..count..)/sum(..count..)), 
                label = scales::percent((..count..)/sum(..count..))), 
            vjust = -0.4, 
            size = 3) +
  scale_y_continuous(labels = percent, limits = c(0,0.25)) +
  labs(x = "", y = "Percentage") +
  theme_classic() + 
  theme(legend.position = "none", plot.title = element_text(size=15), 
    axis.text.x = element_text(angle = 30, hjust = 1)) 
ggsave("graphs/stat_s_income.pdf", income, height = 5, width = 5)


age <- ggplot(data = d, aes(x = factor(agegp))) +
  geom_bar(aes(y = (..count..)/sum(..count..))) +
  geom_text(stat = "count", 
            aes(y = ((..count..)/sum(..count..)), 
                label = scales::percent((..count..)/sum(..count..))), 
            vjust = -0.4, 
            size = 3) +
  scale_y_continuous(labels = percent, limits = c(0,0.25)) +
  labs(x = "", y = "Percentage") +
  theme_classic() +
  theme(legend.position = "none", plot.title = element_text(size=15), 
    axis.text.x = element_text(angle = 30, hjust = 1)) 
ggsave("graphs/stat_s_age.pdf", age, height = 5, width = 5)


